Data

bocc <- read_BOCCResults(path = 'Results/BOCC_Results')

stats2plot <-
  c(
    'cluster_size',
    'gene_ratio',
    'HPO_ratio',
    'num_sig_go_enrichment_terms',
    'max_norm_cell_type_specificity',
    'max_norm_cell_type_comma_sep_string',
    'max_norm_disease_specificity',
    'max_norm_disease_comma_sep_string'
  )

df <- bocc %>%
  create_stats_df(., stats2plot) %>%
  dplyr::mutate(., dplyr::across(c('gene_ratio', 'HPO_ratio'), ~round(.x, 3)))

Cluster Sizes

df %>%
  group_by(method) %>%
  summarize(
    .,
    n = length(cluster_id),
    min_m = min(cluster_size),
    mean_m = mean(cluster_size),
    sd_m = sd(cluster_size),
    max_m = max(cluster_size),
    sum_m = sum(cluster_size)
  ) 
## # A tibble: 4 x 7
##   method                    n min_m mean_m   sd_m max_m sum_m
## * <chr>                 <int> <dbl>  <dbl>  <dbl> <dbl> <dbl>
## 1 cesna_june_22_2021       10    15   71     57.6   201   710
## 2 greedy_june_22_2021       5     3 4274.  5834.  11161 21369
## 3 infomap_june_22_2021     64     2   24.9   53.7   378  1595
## 4 walktrap_june_22_2021   150     1  147.   917.   8821 22088
  # %>%
  # kableExtra::kable(.,
  #                   format = 'latex',
  #                   digits = 2,
  #                   booktabs = TRUE) %>%
  # kableExtra::kable_styling(position = 'center') %>%
  # kableExtra::add_header_above(., list(' ' = 2, 'cluster size' = 5))

Cell and Disease Specificity

On a natural scale…

p1 <- ggplot(
  df %>%
    dplyr::filter(., cluster_size > 1),
  aes(
    x = max_norm_cell_type_specificity,
    y = max_norm_disease_specificity,
    color = method,
    label = cluster_id,
    label2 = cluster_size,
    label3 = num_sig_go_enrichment_terms,
    label4 = gene_ratio,
    label5 = HPO_ratio,
    label6 = max_norm_cell_type_comma_sep_string,
    label7 = max_norm_disease_comma_sep_string,
  )
) +
  geom_point(alpha = 0.5, position = 'jitter') +
  scale_color_brewer('method', palette = 'Set1') +
  labs(x = 'cell type specificity', y = 'disease type specificity') +
  theme_bw() +
  theme(legend.position = 'bottom',
        legend.direction = 'horizontal',
        legend.box = 'vertical')
p1

ggsave('AnalyzeResults/figs/bocc_results.png', width = 5, height = 5)

On a log-log scale…

p2 <- p1 + 
  scale_x_log10(
    bquote(~log[10]('cell specificity')),
    breaks = scales::trans_breaks("log10", function(x) 10^x),
    minor_breaks = log10_minor_break(),
    labels = scales::trans_format("log10", scales::math_format(10^.x))
  ) +
  scale_y_log10(
    bquote(~log[10]('disease specificity')),
    breaks = scales::trans_breaks("log10", function(x) 10^x),
    minor_breaks = log10_minor_break(),
    labels = scales::trans_format("log10", scales::math_format(10^.x))
  ) +
  annotation_logticks()
p2

ggsave('AnalyzeResults/figs/bocc_results_loglog.png', width = 5, height = 5)

As a widget…

p1 <- p1 +
  theme(legend.position = 'bottom')
plotly::ggplotly(p1)

Deep Dive

We’ll look into the communities with high specificity (\(> 0.50\)).

deep_dive <- df %>%
  dplyr::filter(., (max_norm_cell_type_specificity > 0.5 | 
                  max_norm_disease_specificity > 0.5) & cluster_size > 1) %>%
  dplyr::select(., method:cluster_size, max_norm_cell_type_specificity, 
                max_norm_disease_specificity, dplyr::everything())
DT::datatable(deep_dive, options = list(pageLength = 5))

Walktrap

I’ll be investigating the cluster’s identified by cluster_id 17, 18, 41, 84.

Starting with cluster 17:

summarize_cluster(G = G, method = 'walktrap', cluster_id = 17, plot = TRUE)

## $sG
## IGRAPH a4cd54a UN-- 6 5 -- 
## + attr: name (v/c), node (v/n), gene (v/n), communities (v/x)
## + edges from a4cd54a (vertex names):
## [1] ADGRG1    --HP:0040194 ADGRG1    --HP:0011147 ADGRG1    --HP:0007095
## [4] ADGRG1    --HP:0005684 HP:0002803--HP:0005684
## 
## $n
## [1] 6
## 
## $m
## [1] 5

Infomap

I’ll be investigating the clusters identified by cluster_id 5, 17, 30, 37, 38, 42, 46, 49, 52, 53, 54, 57.